
get_revision_cancer_events_full_data <- function(con) {

      # returns base query for new revision cancer table
   (tbl(con, dbplyr::in_schema("path",
                               get_eol_table_names()$cancer_events_full))
    %>%  filter(EVE_event_number <= 7 )  
    %>% do_intensity()
    %>% select(id_var,
               S_index_date_XX,
               DMG_date_of_death_XX,
               DMG_age,
               CNRS_topo_grouped_desc,
               DMG_died_within_365d,
               UTL_f365d_total_cost,
               EVE_intensity,
               EVE_event_number,
               EVE_event_time,
               EVE_lag1_intensity,
               EVE_lag2_intensity,
               EVE_lag3_intensity,
               EVE_lag4_intensity,
               EVE_lag5_intensity,               
               EVE_lag6_intensity,
               EVE_lag7_intensity
              
              )
   )
}


do_events_tree <- function(con,
                           age_cancer_type = do_table_mainTopo_descStats(data_cnr = new_test_cnr)[7:27, .(Sample,med_age)]
                           ) {
   
      cnr_events_res <-  get_load_new(file_name = "revision_events_intensity.RData")
      
      
      calibration_plot(cnr_events_res$preds_test,'preds_after_bayes','true_value',20) +
         ggsave(filename = "calibration_events.pdf", w = 10, h=7)
     
      write.csv(cnr_events_res$roc_auc, "AUC.csv")
      
      cnr_events_res$feature_importance %>% slice_head(n=50)  %>%
         write.csv("feature_importance.csv")
      
       
      events<- cnr_events_res$preds_test %>% data.table() 
      
      
      
      events_tree <-  get_revision_cancer_events_full_data(con) %>%
         add_grouped_cancer_type() %>% 
         drop_dead_on_index_date() %>%
         filter(S_index_date_XX < DMG_date_of_death_XX | is.na(DMG_date_of_death_XX)) %>% 
         collect()  %>% data.table()
      
      events_tree[,days_lived:=ifelse(DMG_died_within_365d=="0",365,
                                    as.numeric(difftime(DMG_date_of_death_XX,
                                                        S_index_date_XX , 
                                                        units = "days"))+1)]
      cost_no_first <- run_query(
         "SELECT * FROM tempTable_cost_1yr_after_cancer_events_day_of_event") %>% 
         data.table()
      
      events_tree<- merge(x = events_tree, 
                          y  = cost_no_first[,.(id_var,
                                                EVE_event_number,
                                                total_cost ,
                                                total_cost_no_dx,
                                                total_cost_no_current)],
                          by = c("id_var",
                                 "EVE_event_number"))
      
      events_tree[,monthly_cost := if_else(UTL_f365d_total_cost>=0,UTL_f365d_total_cost,0)/(days_lived/30)]
      events_tree[,monthly_cost_no_dx := if_else(total_cost_no_dx>=0,total_cost_no_dx,0)/(days_lived/30)]
      events_tree[,monthly_cost_no_current := if_else(total_cost_no_current>=0,total_cost_no_current,0)/(days_lived/30)]
      
      event_res <- merge( 
         x = events_tree,
         y = events[,.(id_var, true_value,
                       EVE_event_number,
                       phat = preds_after_bayes,
                       phat0 = EVE_phat0)],
         by = c("id_var","EVE_event_number"),
         all.x = F,
         all.y = T)
      
      event_res[true_value!=DMG_died_within_365d,]
      
      
      event_res<- event_res[order( id_var, EVE_event_number)]
      
      event_res[,EVE_lag1_intensity_new := lag(EVE_intensity,1), by = "id_var"]
      event_res[,EVE_lag2_intensity_new := lag(EVE_intensity,2), by = "id_var"]
      event_res[,EVE_lag3_intensity_new := lag(EVE_intensity,3), by = "id_var"]
      event_res[,EVE_lag4_intensity_new := lag(EVE_intensity,4), by = "id_var"]
      event_res[,EVE_lag5_intensity_new := lag(EVE_intensity,5), by = "id_var"]
      event_res[,EVE_lag6_intensity_new := lag(EVE_intensity,6), by = "id_var"]
      event_res[,EVE_lag7_intensity_new := lag(EVE_intensity,7), by = "id_var"]
      
      event_res[ CNRS_topo_main_groups == "Lymph nodes (secondary?)", CNRS_topo_main_groups := "Lymph nodes" ]
      event_res[ CNRS_topo_main_groups == "Others", CNRS_topo_main_groups := "Other" ]
      event_res[, CNRS_topo_main_groups := tools::toTitleCase(CNRS_topo_main_groups)]
      
      event_group<- event_res[days_lived >=0,
                              .(.N,
                                fwd_cost_avg = mean(monthly_cost),
                                fwd_cost_sd = sd(monthly_cost),
                                fwd_cost_no_dx_avg = mean(monthly_cost_no_dx),
                                fwd_cost_no_dx_sd = sd(monthly_cost_no_dx),
                                fwd_cost_no_current_avg = mean(monthly_cost_no_current),
                                fwd_cost_no_current_sd = sd(monthly_cost_no_current),
                                phat_avg = mean(phat),
                                phat_sd = sd(phat),
                                phat0_avg = mean(phat0),
                                phat0_sd = sd(phat0),
                                event_time_avg = mean(EVE_event_time),
                                event_time_sd = sd(EVE_event_time)
                              ) ,by = .(CNRS_topo_main_groups,
                                        EVE_event_number,
                                        EVE_category = EVE_intensity,
                                        EVE_lag1_category_new = EVE_lag1_intensity_new,
                                        EVE_lag2_category_new = EVE_lag2_intensity_new,
                                        EVE_lag3_category_new = EVE_lag3_intensity_new,
                                        EVE_lag4_category_new = EVE_lag4_intensity_new,
                                        EVE_lag5_category_new = EVE_lag5_intensity_new,
                                        EVE_lag6_category_new = EVE_lag6_intensity_new,
                                        EVE_lag7_category_new = EVE_lag7_intensity_new
                              )][N>=10]
      
      
      write.csv(event_group,  paste0("events_tree.csv"))
      
      
      event_res$CNRS_topo_main_groups %>% unique()
      age_cancer_type$Sample %>% unique()
      
      event_res_med_age <- merge( event_res, 
                                  age_cancer_type, 
                                  by.x  ="CNRS_topo_main_groups",
                                  by.y = "Sample")
      
      if (event_res[,.N] != event_res_med_age[,.N]) {print("Error")}
      
      event_res_med_age[ ,.N, by = .(CNRS_topo_main_groups,med_age)]
      
      event_group_median<- event_res_med_age[days_lived >=0,
                              .(.N,
                                fwd_cost_avg = mean(monthly_cost),
                                fwd_cost_sd = sd(monthly_cost),
                                fwd_cost_no_dx_avg = mean(monthly_cost_no_dx),
                                fwd_cost_no_dx_sd = sd(monthly_cost_no_dx),
                                fwd_cost_no_current_avg = mean(monthly_cost_no_current),
                                fwd_cost_no_current_sd = sd(monthly_cost_no_current),
                                phat_avg = mean(phat),
                                phat_sd = sd(phat),
                                phat0_avg = mean(phat0),
                                phat0_sd = sd(phat0),
                                event_time_avg = mean(EVE_event_time),
                                event_time_sd = sd(EVE_event_time)
                              ) ,by = .(CNRS_topo_main_groups,
                                        above_median_age = if_else(DMG_age >med_age, "Above","Below"),
                                        EVE_event_number,
                                        EVE_category = EVE_intensity,
                                        EVE_lag1_category_new = EVE_lag1_intensity_new,
                                        EVE_lag2_category_new = EVE_lag2_intensity_new,
                                        EVE_lag3_category_new = EVE_lag3_intensity_new,
                                        EVE_lag4_category_new = EVE_lag4_intensity_new,
                                        EVE_lag5_category_new = EVE_lag5_intensity_new,
                                        EVE_lag6_category_new = EVE_lag6_intensity_new,
                                        EVE_lag7_category_new = EVE_lag7_intensity_new
                              )][N>=10]
      
      
      write.csv(event_group_median, paste0("tree_median_age.csv"))

}


